Unless we are using an affine transformation (e.g. rotation, scale, ...) invariant descriptor, we need to normalize the face before.
As, we use LBP in its basic form because the rotation-invariant version loses relative orientation : this version of LBP is lossy unlike Scattering Transform1. We just really need to have a rotation normalization because we can more easily create scale-invariant descriptors.
We get rid of the distances between fiducial points that can caracterize a face.
We could try using a multimodal approach in order to keep this information.
In [1]:
%pylab inline
from tools import *
from alignment import *
import cv2
imgs_name, imgs = readImagesInDir(sample_directory)
Computation done using CSIRO alignment system.
In [2]:
landmark_detector = CSIROLandmarkDetector()
Affine transform computed from several landmarks through an average / median of the different possible transform or using LSE.
Landmarks used :
In [3]:
import skimage.transform as tfm
normalized_landmarks = np.array([[125.0, 140.0], [100.0, 110.0], [150.0, 110.0], [125.0, 90.0], [125.0, 175.0]])
def normalizationLandmarks(landmarks):
#return np.array([landmarks[30], mean(np.array(landmarks[36:41]), axis=0), mean(np.array(landmarks[42:47]), axis=0), mean(np.array([landmarks[19], landmarks[24]]), axis=0), mean(np.array(landmarks[48:65]), axis=0)])
return np.array([landmarks[30], mean(np.array(landmarks[36:41]), axis=0), mean(np.array(landmarks[42:47]), axis=0), mean(np.array([landmarks[19], landmarks[24]]), axis=0), landmarks[64]])
landmarks = normalizationLandmarks(landmark_detector.detectLandmarks(imgs[0]))
transform = tfm.estimate_transform("affine", normalized_landmarks, landmarks)
output = np.array(imgs[0], copy=True)
for landmark in landmarks:
cv2.circle(output, tuple(landmark.astype(np.int)), 2, (255,0,0))
output = tfm.warp(output, transform)
for landmark,normalized_landmark, in zip(transform.inverse(landmarks), normalized_landmarks):
cv2.circle(output, tuple(landmark.astype(np.int)), 2, (0,255,0))
cv2.circle(output, tuple(normalized_landmark.astype(np.int)), 2, (0,0,255))
imshow(output)
Out[3]:
In [4]:
def normalizationPointsChoiceTests(imgs, normalized_landmarks, normalizationFunction):
outputs = []
for img in imgs:
landmarks = normalizationFunction(landmark_detector.detectLandmarks(img))
output = np.array(img, copy=True)
for landmark in landmarks:
cv2.circle(output, tuple(landmark.astype(np.int)), 2, (255,0,0))
transform = tfm.estimate_transform("affine", normalized_landmarks, landmarks)
outputs.append(tfm.warp(output, transform))
return outputs
landmarks_choices_output = [np.array(img, copy=True) for img in imgs]
In [5]:
from itertools import *
def computeMedianAffineTransform(landmarks, normalized_landmarks):
matrices = []
for subset in combinations(range(len(landmarks)),3):
indexes = np.array(subset)
matrices.append(tfm.estimate_transform("affine", normalized_landmarks[indexes], landmarks[indexes]).params.copy())
matrix = np.median(matrices, axis=0)
return tfm.AffineTransform(matrix=matrix)
normalized_landmarks = np.array([[125.0, 140.0], [100.0, 110.0], [150.0, 110.0], [125.0, 90.0], [125.0, 175.0]])
def normalizationLandmarks(landmarks):
return np.array([landmarks[30], mean(np.array(landmarks[36:41]), axis=0), mean(np.array(landmarks[42:47]), axis=0), mean(np.array([landmarks[19], landmarks[24]]), axis=0), landmarks[64]])
outputs = []
for img in imgs:
landmarks = normalizationLandmarks(landmark_detector.detectLandmarks(img))
transform = computeMedianAffineTransform(landmarks, normalized_landmarks)
outputs.append(tfm.warp(img, transform))
landmarks_choices_output += outputs
In [6]:
from itertools import *
def computeSimilarityTransform(landmarks, normalized_landmarks):
L = landmarks.shape[0]
a = np.zeros((2*L, 4))
a[:,0] = normalized_landmarks.reshape((2*L,1))[:,0]
a[::2,1] = -normalized_landmarks[:,1]
a[1::2,1] = normalized_landmarks[:,0]
a[::2,2] = 1
a[1::2,2] = 0
a[::2,3] = 0
a[1::2,3] = 1
b = landmarks.reshape((2*L,1))[:,0]
output = np.linalg.lstsq(a, b)[0]
matrix = np.array([[output[0], -output[1], output[2]], [output[1], output[0], output[3]], [0, 0, 1]])
return tfm.AffineTransform(matrix=matrix)
normalized_landmarks = np.array([[125.0, 140.0], [100.0, 110.0], [150.0, 110.0], [125.0, 90.0], [125.0, 175.0]])
def normalizationLandmarks(landmarks):
return np.array([landmarks[30], mean(np.array(landmarks[36:41]), axis=0), mean(np.array(landmarks[42:47]), axis=0), mean(np.array([landmarks[19], landmarks[24]]), axis=0), landmarks[64]])
outputs = []
for img in imgs:
landmarks = normalizationLandmarks(landmark_detector.detectLandmarks(img))
transform = computeSimilarityTransform(landmarks, normalized_landmarks)
outputs.append(tfm.warp(img, transform))
output = np.copy(outputs[-1])
for landmark,normalized_landmark, in zip(transform.inverse(landmarks), normalized_landmarks):
cv2.circle(output, tuple(landmark.astype(np.int)), 2, (0,255,0), -1)
cv2.circle(output, tuple(normalized_landmark.astype(np.int)), 2, (0,0,255), -1)
imshow(output)
landmarks_choices_output += outputs
C++ version
In [7]:
face_normalization = FaceNormalization()
face_normalization.setReferenceShape(landmark_detector.getReferenceShape())
outputs = []
for img in imgs:
landmarks = landmark_detector.extractLandmarksForNormalization(landmark_detector.detectLandmarks(img))
output = np.array(img, copy=True)
face_normalization.normalize(output, landmarks)
outputs.append(output)
landmarks_choices_output += outputs
In [8]:
showMosaic(landmarks_choices_output, ncols=len(imgs))
Issue : if he is a bit turned, the affine transformation estimation fails...
The estimate used by Skimage is a Total Least Square estimate. Maybe should try OpenCV's estimateRigidTransformation (actually estimates affine transformation) ?
The median of the affine transformations turns out to be the best. But is it a real affine transformation ?
=> better precision due to outliers ???
Time not really comparable (not efficient Python code for the median affine transformation computation).
In [11]:
import config
from itertools import imap
from stats import *
from datasets import lfw
from benchmarks import lfw as lfw_bench
sets_ground_truth = lfw.loadSetsGroundTruth()
labels = ["LFW-a baseline results", "LBF 51 landmarks normalized faces", "LBF 68 landmarks normalized faces", "CSIRO normalized faces", "LFW (no normalization)"]
descs_files = ["ulbp_wpca_lfwa", "ulbp_wpca_lfw_normalized_lbf_51_landmarks" ,"ulbp_wpca_lfw_normalized_lbf_68_landmarks", "ulbp_wpca_lfw_normalized_csiro", "ulbp_wpca_lfw"]
scores = [lfw_bench.computeDistanceMatrix(descs, sets_ground_truth) for descs in imap(lfw_bench.loadDescriptors, descs_files)]
rocs = [lfw_bench.computeMeanROC(score) for score in scores]
In [12]:
plotROC(rocs, labels, title="ROC curve for different normalization methods")
In [13]:
for label, score in zip(labels, scores):
mean, std = lfw_bench.computeMeanAccuracy(score)
print "%s: %0.4f +/- %0.4f"%(label, mean, std)
In [15]:
data = lfw.loadData("lfw_normalized_csiro")
In [16]:
markLandmarks(data[:40], color=(255,255,255))
showMosaic(data[:40], ncols=5)
In [17]:
%timeit landmark_detector.detectLandmarks(output)
In [18]:
data = lfw.loadData("lfw_normalized_lbf_51_landmarks")
showMosaic(data[:40], ncols=5)
In [21]:
from cpp_wrapper.face_detection import *
high_recall_face_detector = FaceDetector(high_recall=True)
face_detector = FaceDetector()
landmark_detector = LBFLandmarkDetector(detector="opencv", landmarks=68)
In [22]:
face_normalization = FaceNormalization()
face_normalization.setReferenceShape(landmark_detector.getReferenceShape())
outputs = []
for img in imgs:
output = np.copy(img)
faces = high_recall_face_detector.detectFaces(output)
if len(faces) > 0:
face = faces[0]
faces = face_detector.detectFaces(output)
if len(faces) > 0:
face = faces[0]
landmarks = landmark_detector.detectLandmarks(output, face)
cv2.rectangle(output, face[:2], face[2:], (0,0,0), 2)
for landmark in landmarks:
cv2.circle(output, tuple(landmark.astype(np.int)), 2, (0,255,0), -1)
normalization_landmarks = landmark_detector.extractLandmarksForNormalization(landmarks)
face_normalization.normalize(output, normalization_landmarks)
outputs.append(output)
In [23]:
showMosaic(outputs, ncols=5)
In [24]:
img = np.copy(imgs[2])
face = face_detector.detectFaces(img)[0]
landmarks = landmark_detector.detectLandmarks(img, face)
normalization_landmarks = landmark_detector.extractLandmarksForNormalization(landmarks)
face_normalization.normalize(img, normalization_landmarks)
face = face_detector.detectFaces(img)[0]
landmarks = landmark_detector.detectLandmarks(img, face)
ellipse = cv2.fitEllipse(landmarks[:17].astype(np.float32))
print ellipse
cv2.ellipse(img, ellipse, (255, 0, 0), 2)
cv2.rectangle(img, face[:2], face[2:], (0,0,0), 2)
for landmark in landmarks:
cv2.circle(img, tuple(landmark.astype(np.int)), 2, (0,255,0), -1)
imshow(img)
Out[24]:
In [25]:
print outputs[0].shape
for i in range(len(outputs)):
outputs[i] = outputs[i][49:-49, 84:-84]
print outputs[0].shape
In [26]:
showMosaic(outputs, ncols=5)
In [30]:
%timeit landmark_detector.detectLandmarks(data[0], face)
%timeit face_normalization.normalize(data[0], normalization_landmarks)
In [24]: